Source code for BigDFT.PDoS

"""
This module offers a suite of tools for calculating and analyzing the projected
density of states (PDoS) in molecular systems. It provides functionality to
extract projection information from cubic and linear scaling PDoS calculations,
and compute weights associated with each projection.

Examples:
The module includes two example functions (_example_cubic and _example_linear)
to demonstrate usage with cubic and linear scaling PDoS calculations,
respectively.
"""

# Constants
PROJECTION_THRESHOLD = 0.3


[docs]def cubic_projection_info(log): """ Extracts projection information from a cubic scaling Projected Density of States (PDoS) calculation, as performed in a BigDFT run. This function processes the output log file from a cubic scaling BigDFT calculation to map each atom in the system to its projections, such as 's', 'px', 'py', 'pz', etc. The function is designed to work with the specific structure of BigDFT log files, particularly extracting information from the 'Mulliken Charge Population Analysis' section. Args: log (BigDFT.Logfiles.Logfile): A BigDFT log file object, containing the results of a cubic scaling calculation with the pdos option on. Returns: dict of dict of list: A nested dictionary where the first key is the atomic symbol, and the second key is the projection type. The value is a list of indices, each corresponding to a projection of that type for the atom. For example: {'C': {'s': [0], 'px': [1], 'py': [2], 'pz': [3]}}. """ from BigDFT.Systems import system_from_log from collections import defaultdict sys = system_from_log(log, fragmentation="full") proj_list = defaultdict(lambda: defaultdict(list)) charge_analy = log.log["Mulliken Charge Population Analysis"] i = 0 for at, proj in zip(sys.get_atoms(), charge_analy): for p in proj: if p == "Center Quantities": continue proj_list[at.sym][p].append(i) i += 1 return _defaultdict_to_dict(proj_list)
[docs]def cubic_projection_weights(log, proj_info, log_dir="."): """ Calculates the weights associated with each projection from a cubic scaling Projected Density of States (PDoS) calculation. This function interprets the PDoS data stored in a BigDFT log file to compute the weights of different atomic projections (e.g., 's', 'px', 'py', 'pz') for each orbital. It utilizes the projection information obtained from `cubic_projection_info` and the PDoS data file ('pdos.dat'). Args: log (BigDFT.Logfiles.Logfile): The BigDFT log file object from a cubic scaling PDoS calculation. proj_info (dict): A dictionary mapping atomic symbols to projection types and indices, as returned by `cubic_projection_info`. log_dir (str, optional): The directory where the BigDFT log file is located. Defaults to the current directory ("."). Returns: list of dict: A list where each element corresponds to an orbital. Each element is a dict, mapping atomic types to another dict, which in turn maps projections to their weights. For example, if you want the weight of the first orbital for the 's' projection of Carbon, it would be accessed as weights[0]['C']['s']. """ from os.path import join from collections import defaultdict weights = [] # Map back from index to symbol / projector type reverse_lookup = {} for k, v in proj_info.items(): for p, idx in v.items(): for i in idx: reverse_lookup[i] = (k, p) with open(join(log_dir, log.data_directory, "pdos.dat")) as ifile: for line in ifile: vals = [float(x) for x in line.split()] wdict = defaultdict(lambda: defaultdict(float)) i = 0 for j, w in enumerate(vals): if j not in reverse_lookup: # Sometimes padding continue sym, p = reverse_lookup[j] wdict[sym][p] += w weights.append(wdict) return _defaultdict_to_dict(weights)
[docs]def compute_weight(evec, sks_evec, orb_idx, proj_idx): """ Computes the weight of a specific projection for a given orbital in the projected density of states (PDoS) calculation, typically used in the linear scaling mode. This function calculates the contribution of a specified set of projections (identified by their indices) to a particular orbital, based on the eigenvectors of the system. The function allows for optional inclusion of the overlap matrix (s) and the density matrix (k) if you're doing Mulliken analysis or projecting out virtuals, respectively. Args: evec (numpy.array): Array of eigenvectors, where each column represents an eigenvector of the system. evec (numpy.array): Overlap * Density * Overlap * evec. Overlap allows you to do a Mulliken projection, and Density can be used to filter in energy. If you're doing Lowdin, just use the density. If you don't need to filter in energy, the identity matrix can be used for the density. orb_idx (int): Index of the orbital for which the weight is being computed. proj_idx (list of int): List of indices corresponding to the projections included in the weight calculation. Returns: (float): the calculated weight. """ from numpy import trace # Project seig_l = evec[:, orb_idx:orb_idx+1] seig_r = sks_evec[:, orb_idx:orb_idx+1] return trace(seig_l[proj_idx, :].T @ (seig_r)[proj_idx, :]).item()
[docs]def linear_projection_info(log, thresh=PROJECTION_THRESHOLD): """ Analyze and categorize projection types for each atom in the system based on electrostatic multipole information from a given log file. This function processes the electrostatic multipoles associated with atoms in a system and classifies each support function (such as 's', 'px', 'py', 'pz', 'dxy', etc.) based on their multipole expansion. Args: log (BigDFT.Logfiles.Logfile): The log file object from a BigDFT run. thresh (float): the threshold used for determining the character of a multipole. Returns: dict of dict of list: A nested dictionary where the first key is the atomic symbol, the second key is the projection type, and the value is a list of indices associated with that projection type for the given atom. Example: {'H': {'s': [0]}, 'O': {'px': [1], 'py': [2], 'pz': [3]'}} Raises: ValueError: If the projection type cannot be determined for an atom based on the heuristic criteria. """ def guess_proj_type(atm, thresh): if abs(abs(atm["q1"][0]) - 1) < thresh: ptype = 'py' elif abs(abs(atm["q1"][1]) - 1) < thresh: ptype = 'pz' elif abs(abs(atm["q1"][2]) - 1) < thresh: ptype = 'px' elif abs(abs(atm["q0"][0]) - 1) < thresh: ptype = 's' elif abs(abs(atm["q2"][0]) - 1) < thresh: ptype = 'dxy' elif abs(abs(atm["q2"][1]) - 1) < thresh: ptype = 'dyz' elif abs(abs(atm["q2"][2]) - 1) < thresh: ptype = 'dz^2' elif abs(abs(atm["q2"][3]) - 1) < thresh: ptype = 'dxz' elif abs(abs(atm["q2"][4]) - 1) < thresh: ptype = 'dx^2-y^2' else: raise ValueError("Can't figure out " + str(atm)) return ptype from warnings import warn from collections import defaultdict proj_info = defaultdict(lambda: defaultdict(list)) # Use the multipole information for the compute matching mp = log.electrostatic_multipoles for pole in mp["values"]: pole["units"] = mp["units"] # Get the Multipole Descriptions gsfm = log.log["Gross support functions moments"] mp_info = gsfm["Multipole coefficients"]["values"] for i, atm in enumerate(mp_info): sym = atm["sym"].split("-")[0] ptype = guess_proj_type(atm, thresh) if atm["type"] != "unknown": ltype = atm["type"].replace("_", "") if ltype != ptype: warn("Uncertainty about support function character" + str(atm)) ptype = ltype proj_info[sym][ptype].append(i) return _defaultdict_to_dict(proj_info)
[docs]def linear_projection_weights(evec, proj_info, s, k): """ Computes the weights associated with a set of projections for each eigenvalue in a linear scaling Projected Density of States (PDoS) calculation. This function uses the eigenvectors and eigenvalues of a system, along with projection information (such as atomic symbols and projection types), to calculate the contribution of these projections to each eigenvalue. The function allows for inclusion of the overlap matrix (s) and the density matrix (k) if you're doing Mulliken analysis or projecting out virtuals, respectively. If you're doing Lowdin, pass the Lowdin orthogonalized eigenvectors and the identity matrix for s. You're doing Mulliken, and don't want to project out virtuals, pass the overlap matrix as K. Args: evec (numpy.array): Array of eigenvectors, where each column represents an eigenvector of the system. proj_info (dict): Dictionary mapping atom symbols to projections, and projections to indices, as obtained from `linear_projection_info`. s (numpy.array): Overlap matrix, used in the calculation of the weights k (numpy.array): Density matrix, used in the calculation of the weights Returns: list of dict: A list where each element corresponds to an eigenvalue. Each element is a dict, mapping atomic types to another dict, which in turn maps projections to their computed weights. For example, weights[0]['C']['s'] would access the weight of 's' projection for Carbon for the first eigenvalue. """ from collections import defaultdict sks = s @ k @ s sks_evec = sks @ evec weights = [] for i in range(evec.shape[1]): wdict = defaultdict(lambda: defaultdict(float)) for atm, proj in proj_info.items(): for p in proj: wdict[atm][p] += compute_weight(evec, sks_evec, i, proj_info[atm][p]) weights.append(wdict) return _defaultdict_to_dict(weights)
[docs]def compute_fragment_weights(sys, log, evec, s, k): """ Computes the weights associated with fragments in a system for each eigenvalue for a Fragment Projected Density of States (PDoS) calculation. Args: sys (BigDFT.Systems.System): The system object as defined in BigDFT, representing the fragments of the system. log (BigDFT.Logfiles.Logfile): The log file object from a BigDFT run computed in the linear scaling mode. evec (numpy.array): Array of eigenvectors, where each column represents an eigenvector of the system. s (numpy.array): Overlap matrix, used in the calculation of the weights k (numpy.array): Density matrix, used in the calculation of the weights Returns: list of dict: A list where each element corresponds to an eigenvalue. Each element is a dict, mapping fragment identifiers to their computed weights. For example, weights[0]['HOH:1'] accesses the weight of fragment 'HOH:1' for the first eigenvalue. """ from BigDFT.PostProcessing import BigDFTool tool = BigDFTool() fidx = tool.get_frag_indices(sys, log) weights = linear_projection_weights(evec, {"frag": fidx}, s=s, k=k) # Eliminate intermediate dictionary for i, v in enumerate(weights): weights[i] = v["frag"] return _defaultdict_to_dict(weights)
[docs]def compute_SFs_weights(evec, metadata, mapping): """ Computes the support functions weights from the mapped indices Args: evec (numpy.array): Array of eigenvectors metadata (BigDFT.Spillage.MatrixMetadata): The information on the matrices indexing mapping (dict): The instruction to fold the eigenvectors Returns: list of dict: A list where each element corresponds to an eigenvalue. Each element is a dict, mapping SFs to their computed weights. """ from numpy import array, sum id_mat = [i.get('indices') for i in metadata.atoms] id_sym = [i.get('sym') for i in metadata.atoms] weights = [{k: [] for k, v in mapping.items()} for _ in range(len(evec))] evec2 = evec**2 for k, v in mapping.items(): id_k = array([id_mat[i] for i, ai in enumerate(id_sym) if ai == k]) wg_k = array([sum(evec2[id_k[:, vi].flatten(), :], 0) for vi in v]) [weights[i].update({k: [float(w_ii)for w_ii in w_i]}) for i, w_i in enumerate(wg_k.T)] return weights
def _defaultdict_to_dict(d): """ Recursively converts a nested defaultdict or a list of defaultdicts to a nested dict or a list of dicts, respectively. Args: d (defaultdict, dict, list): A defaultdict, a standard dict, or a list. The defaultdict may be nested and the list may contain defaultdicts or nested defaultdicts. Returns: dict or list: A standard dict or a list of dicts with all nested defaultdicts converted into dicts. If the input is a standard dict or a list containing standard dicts, it returns the input as-is. """ from collections import defaultdict if isinstance(d, defaultdict): return {k: _defaultdict_to_dict(v) for k, v in d.items()} elif isinstance(d, dict): return {k: _defaultdict_to_dict(v) for k, v in d.items()} elif isinstance(d, list): return [_defaultdict_to_dict(v) for v in d] return d
[docs]def _example_cubic(): # System from BigDFT.Database.Molecules import get_molecule sys = get_molecule("H2O") # Input file from BigDFT.Inputfiles import Inputfile inp = Inputfile() inp.set_hgrid(0.4) inp.set_xc("PBE") inp.calculate_pdos() # Calculate from BigDFT.Calculators import SystemCalculator code = SystemCalculator(skip=True, verbose=False) log = code.run(sys=sys, input=inp, name="pdos-C") # Get weights proj_list = cubic_projection_info(log) weights = cubic_projection_weights(log, proj_list) print(weights[0]["O"]["s"]) print(weights[1]["H"]["s"])
[docs]def _example_linear(): # System from BigDFT.Database.Molecules import get_molecule sys = get_molecule("H2O") # Input file from BigDFT.Inputfiles import Inputfile inp = Inputfile() inp.set_hgrid(0.4) inp.set_xc("PBE") inp["import"] = "linear" inp["lin_general"] = {"support_function_multipoles": True} from BigDFT.Calculators import SystemCalculator code = SystemCalculator(skip=True, verbose=False) log = code.run(sys=sys, input=inp, name="pdos-L") # Read the matrices from BigDFT.PostProcessing import BigDFTool tool = BigDFTool() h = tool.get_matrix_h(log) s = tool.get_matrix_s(log) k = tool.get_matrix_k(log) # Diagonalize from scipy.linalg import eigh evals, evec = eigh(h.todense(), b=s.todense()) # Get the weights proj_info = linear_projection_info(log) weights = linear_projection_weights(evec, proj_info, s=s, k=0.5 * k) print(weights[0]["O"]["s"]) print(weights[1]["H"]["s"])
if __name__ == "__main__": _example_linear() _example_cubic()